Wenatchee RS analyses

Preliminary analysis of Wenatchee River Chinook reproductive success data

Michael Ford 2.17.25

library(ggplot2)
library(cowplot)
library(MASS)
library(reshape2)

ped = read.csv("/Users/mike.ford/Documents/MikeFord/WenSpChkProp/FRANZWensp_analysis/pedigree_fitness_Tumwater_Grun_3.12.25.csv",stringsAsFactors = F)

### do some basic summaries
pedf = subset(ped,Origin.off == "Hatchery" | Origin.off == "Wild")
pedf = subset(pedf,!is.na(Origin.off))
pedf = subset(pedf,Sex_Final.off == "Male" | Sex_Final.off == "Female")
table(pedf$GRun.off)

      Spring       Summer Undetermined 
       53083         2062          107 
pedf = subset(pedf,GRun.off == "Spring" )
table(pedf$Final_Status.off)

                           ??Broodstock?? 
                                       34 
                               Broodstock 
                                     3452 
                                 Fallback 
                                      152 
                        Mortality/Surplus 
                                      552 
                 Released from Broodstock 
                                       66 
                 Released From Broodstock 
                                      222 
      Released From Broodstock & Fellback 
                                        2 
Spawned and then Released from Broodstock 
                                        2 
                         Spawning Grounds 
                                    48601 
pedf = subset(pedf,Final_Status.off == "Broodstock" | Final_Status.off == "Spawning Grounds")
table(pedf$Final_Status.off,pedf$Year.off)
                  
                   2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
  Broodstock        183  249  314  139  295  197  169  133   88   93  203  213
  Spawning Grounds 2547 3817 1943 3653 5577 4855 5181 3558 4652 3480 1618 1881
                  
                   2016 2017 2018 2019 2020 2021 2022 2023
  Broodstock        214  187  189   71  105  129  136  145
  Spawning Grounds  898  825 1297  294  361  589  914  661

Summarize the number of location and origin calls that we changed due to location/origin contradictions

Location changes

table(pedf$Final_Status.off,pedf$Final_Status_ped.off)
                  
                   Broodstock Spawning Grounds
  Broodstock             3445                7
  Spawning Grounds         58            48543

Origin Changes

table(pedf$Origin.off,pedf$Origin_ped.off)
          
           Hatchery  Wild
  Hatchery    37753   185
  Wild          157 13958

Annual counts by sex and origin

#counts per year by sex and origin
#pdf("year counts by sex and origin.pdf")
ggplot(data=pedf,aes(Year.off,fill=Origin_ped.off)) + geom_bar() + facet_wrap(~Sex_Final.off) + scale_fill_manual(values=c("darkseagreen","steelblue"))

#dev.off()

Annual run timing distributions

pedf$Date.off = as.Date(pedf$Date.off,format = '%m/%d/%y')
temp = paste("1/1/",pedf$Year.off,sep="")
temp = as.Date(temp,format="%m/%d/%Y")
dayofyear = pedf$Date.off - temp
pedf$Dayofyear.off = dayofyear

pedf$Date.sire = as.Date(pedf$Date.sire,format = '%m/%d/%y')
temp = paste("1/1/",pedf$Year.sire,sep="")
temp = as.Date(temp,format="%m/%d/%Y")
dayofyear = pedf$Date.sire - temp
pedf$Dayofyear.sire = dayofyear

pedf$Date.dam = as.Date(pedf$Date.dam,format = '%m/%d/%y')
temp = paste("1/1/",pedf$Year.dam,sep="")
temp = as.Date(temp,format="%m/%d/%Y")
dayofyear = pedf$Date.dam - temp
pedf$Dayofyear.dam = dayofyear

#pdf("run time by year.pdf")
ggplot(data=pedf,aes(y=as.factor(Year.off),x=Dayofyear.off)) + geom_violin(draw_quantiles = .5)
Don't know how to automatically pick scale for object of type <difftime>.
Defaulting to continuous.

#dev.off()

Annual length (POH) distribution, by sex

#pdf("POH by year and sex.pdf")
ggplot(data=pedf,aes(y=as.factor(Year.off),x=POH.off,fill=Sex_Final.off)) + geom_violin(draw_quantiles = .5) + scale_fill_manual(values=c("darkseagreen","steelblue"))
Warning: Removed 7 rows containing non-finite values (`stat_ydensity()`).

#dev.off()

Annual length (POH) distribution by origin, original

#pdf("POH by year and origin.pdf")
ggplot(data=pedf,aes(y=as.factor(Year.off),x=POH.off,fill=Origin.off)) + geom_violin(draw_quantiles = .5) + scale_fill_manual(values=c("darkseagreen","steelblue")) 
Warning: Removed 7 rows containing non-finite values (`stat_ydensity()`).

#dev.off()

Annual length (POH) distribution by origin, pedigree corrected

#pdf("POH by year and origin.pdf")
ggplot(data=pedf,aes(y=as.factor(Year.off),x=POH.off,fill=Origin_ped.off)) + geom_violin(draw_quantiles = .5) + scale_fill_manual(values=c("darkseagreen","steelblue")) 
Warning: Removed 7 rows containing non-finite values (`stat_ydensity()`).

#dev.off()

Dot plot of reproductive success (RS = adult offspring number) by location

# RS by year, origin, sex, and location
#pdf("RS by year and origin and final location.pdf")
ggplot(data=subset(pedf),aes(y=as.factor(Year.off),x=noff.off,color=Origin_ped.off)) + geom_jitter() + scale_color_manual(values=c("darkseagreen","steelblue")) + facet_wrap(~Final_Status_ped.off,scales="free")
Warning: Removed 172 rows containing missing values (`geom_point()`).

#dev.off()
#### make a basic RS and RRS tables, limited to comparisons of interest
makeRStable = function(d)
{
  rst = by(d,d[,c("Year.off")],FUN=function(x) t.test(noff.off~Origin_ped.off,data=x))
  rst
  rsn = by(d,d[,c("Year.off","Origin_ped.off")],FUN=function(x) nrow(x))
  dimnames(rsn)
  Nhat = rsn[,1]
  Nwild = rsn[,2]
  
  years = unlist(dimnames(rst)[1])
  hatmeans = unlist(lapply(rst,function(x) x$estimate[1]))
  wildmeans = unlist(lapply(rst,function(x) x$estimate[2]))
  pvals = unlist(lapply(rst,function(x) x$p.value))
  rrs = hatmeans/wildmeans
  basicrrs = data.frame(Year=years, Hatchery_N = Nhat, Hatchery_RS = hatmeans, Wild_N = Nwild, Wild_RS=wildmeans, RRS = rrs, P_value=pvals,row.names = NULL)
  return(basicrrs)
}

Table of RS and RRS by year, for males in the stream environment

d = subset(pedf,Year.off <= 2018 & Final_Status.off=="Spawning Grounds" & Sex_Final.off=="Male")
RStable = makeRStable(d)
RStable
   Year Hatchery_N Hatchery_RS Wild_N   Wild_RS       RRS      P_value
1  2004       1475  0.09152542    418 0.7655502 0.1195551 1.579701e-20
2  2005       1556  0.25578406    242 0.8057851 0.3174346 7.196090e-09
3  2006        742  0.49460916    206 1.1262136 0.4391788 3.991559e-06
4  2007       2778  0.21413122    176 0.6971429 0.3071554 5.053088e-08
5  2008       1961  0.38959714    276 1.5434783 0.2524151 2.175073e-06
6  2009       3021  0.11122145    346 0.5173410 0.2149867 1.692621e-09
7  2010       2322  0.16932357    474 0.4767932 0.3551300 7.540354e-12
8  2011       1528  0.20959264    679 0.5872781 0.3568882 1.278270e-13
9  2012       1295  0.11119691    551 0.3861566 0.2879580 5.663245e-15
10 2013       1242  0.14895330    326 0.3527607 0.4222502 1.454986e-07
11 2014        343  0.31085044    405 0.4480198 0.6938319 2.163231e-01
12 2015        405  0.21481481    326 0.3282209 0.6544825 1.568027e-02
13 2016         42  0.33333333    310 0.6580645 0.5065359 4.481112e-02
14 2017        227  0.97797357    169 0.9881657 0.9896858 9.456132e-01
15 2018        443  0.80769231    130 1.7307692 0.4666667 1.677760e-05
write.csv(RStable,"RRS table males stream origin_ped.csv")

RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS")

ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

Table of RS and RRS by year, for females in the stream environment

d = subset(pedf,Year.off <= 2018 & Final_Status.off=="Spawning Grounds" & Sex_Final.off=="Female")
RStable = makeRStable(d)
RStable
   Year Hatchery_N Hatchery_RS Wild_N   Wild_RS       RRS      P_value
1  2004        263   0.6083650    391 0.9181586 0.6625925 1.189784e-03
2  2005       1761   0.2759796    258 0.6976744 0.3955707 8.368574e-08
3  2006        729   0.6021948    266 1.0112782 0.5954789 1.996835e-04
4  2007        578   0.7577855    121 1.4049587 0.5393650 3.975098e-05
5  2008       3068   0.3044329    272 0.9632353 0.3160524 3.264713e-14
6  2009       1166   0.2787307    322 0.4844720 0.5753288 3.449681e-05
7  2010       2056   0.2232490    329 0.7112462 0.3138843 2.755320e-07
8  2011        860   0.3192982    491 1.0388548 0.3073560 2.778126e-07
9  2012       2171   0.1036389    635 0.2677165 0.3871217 1.620060e-11
10 2013       1572   0.1572247    340 0.2241888 0.7013049 3.662187e-02
11 2014        512   0.2357564    358 0.4649860 0.5070182 2.408309e-06
12 2015        758   0.1583113    392 0.2091837 0.7568055 1.300788e-01
13 2016        186   0.3763441    360 0.6000000 0.6272401 1.795463e-03
14 2017        257   0.8054475    172 1.1918605 0.6757901 3.265192e-03
15 2018        600   0.7868020    124 1.6612903 0.4736090 4.474933e-08
write.csv(RStable,"RRS table females stream origin_ped.csv")

RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS")

ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

Table of RS and RRS by year, for males in the broodstock

d = subset(pedf,Year.off <= 2013 & Final_Status.off=="Broodstock" & Sex_Final.off=="Male")
RStable = makeRStable(d)
RStable
   Year Hatchery_N Hatchery_RS Wild_N    Wild_RS       RRS      P_value
1  2004         32   56.656250     33 100.909091 0.5614583 2.402218e-04
2  2005         81   17.037037     47  26.510638 0.6426491 2.381994e-04
3  2006         90   28.466667     43  73.860465 0.3854114 2.074918e-08
4  2007         44   15.954545     16  28.125000 0.5672727 4.682208e-02
5  2008         99   19.979798     27  52.888889 0.3777693 1.186619e-07
6  2009         44   19.454545     33  29.969697 0.6491405 4.533106e-03
7  2010         55   16.545455     29  22.931034 0.7215311 1.577355e-01
8  2011         28   11.464286     30  16.233333 0.7062188 1.214676e-01
9  2012         17    7.235294     24   5.173913 1.3984182 1.639305e-01
10 2013          2   12.000000     43  10.720930 1.1193059 6.528617e-01
write.csv(RStable,"RRS males brood origin_ped.csv")

RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS")

ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

Table of RS and RRS by year, for females in the broodstock

d = subset(pedf,Year.off <= 2013 & Final_Status.off=="Broodstock" & Sex_Final.off=="Female")
RStable = makeRStable(d)
RStable
   Year Hatchery_N Hatchery_RS Wild_N  Wild_RS       RRS    P_value
1  2004         85   52.352941     33 42.18182 1.2411258 0.07000093
2  2005         86   21.360465     35 22.80000 0.9368625 0.57492538
3  2006        134   30.514925     47 37.27660 0.8186082 0.10439641
4  2007         55   15.363636     24 16.29167 0.9430365 0.72066757
5  2008        129   21.286822     40 19.25000 1.1058089 0.39146698
6  2009         70   15.857143     50 16.36000 0.9692630 0.79883014
7  2010         50   18.440000     35 18.25714 1.0100156 0.96270741
8  2011         44   10.000000     31 12.41935 0.8051948 0.29295468
9  2012         22    4.818182     25  4.40000 1.0950413 0.69548757
10 2013          5   10.000000     43 10.09302 0.9907834 0.96687237
write.csv(RStable,"RRS females brood origin_ped.csv")

RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS")

ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

#### look at the effects of parental origin (=grandparent spawning location) on RS
# first create a new column that provides parental origin
pedf$Origin.parents = ifelse(!is.na(pedf$Origin_ped.dam) & !is.na(pedf$Origin_ped.sire),paste(pedf$Origin_ped.dam,pedf$Origin_ped.sire,sep="-"),NA)


makeRSGEN2table = function(d)
{
  rst = by(d,d[,c("Year.off")],FUN=function(x) summary(lm(noff.off~Origin.parents,data=x)))
  rst[[1]]$coefficients
  rsn = by(d,d[,c("Year.off","Origin.parents")],FUN=function(x) nrow(x))
  dimnames(rsn)
  Nhathat = rsn[,1]
  Nhatwild = rsn[,2]
  Nwildhat = rsn[,3]
  Nwildwild = rsn[,4]
  
  years = unlist(dimnames(rst)[1])
  hathatmeans = unlist(lapply(rst,function(x) x$coefficients[1,1]))
  hatwildmeans = unlist(lapply(rst,function(x) x$coefficients[2,1])) + hathatmeans
  wildhatmeans = unlist(lapply(rst,function(x) x$coefficients[3,1])) + hathatmeans
  wildwildmeans = unlist(lapply(rst,function(x) x$coefficients[4,1])) + hathatmeans
  pvalhatwild = unlist(lapply(rst,function(x) x$coefficients[2,4]))
  pvalwildhat = unlist(lapply(rst,function(x) x$coefficients[3,4]))
  pvalwildwild = unlist(lapply(rst,function(x) x$coefficients[4,4]))
  rrsHW = hatwildmeans/hathatmeans
  rrsWH = wildhatmeans/hathatmeans
  rrsWW = wildwildmeans/hathatmeans
  basicrrs = data.frame(Year=years, HH_N = Nhathat,HW_N=Nhatwild,WH_N=Nwildhat,WW_N=Nwildwild,HH_RS=hathatmeans, HW_RS = hatwildmeans,WH_RS=wildhatmeans,WW_RS=wildwildmeans, RRS_HW = rrsHW,RRS_WH=rrsWH,RRS_WW=rrsWW, P_value_HW=pvalhatwild,P_value_WH=pvalwildhat,P_value_WW=pvalwildwild,row.names = NULL)
  return(basicrrs)
}

Annual table for 2 generation RRS in the stream environment - wild fish

d = subset(pedf,Year.off <= 2018 & Final_Status_ped.off=="Spawning Grounds" & Final_Status_ped.dam=="Spawning Grounds" &  Final_Status_ped.sire=="Spawning Grounds")
RStable = makeRSGEN2table(d)
RStable
   Year HH_N HW_N WH_N WW_N     HH_RS     HW_RS     WH_RS     WW_RS    RRS_HW
1  2007    3    7    6   13 0.6666667 0.4285714 0.5000000 0.2307692 0.6428571
2  2008   46   55   49  168 2.6304348 1.1818182 0.9387755 1.1250000 0.4492863
3  2009  248  116   77   62 0.4758065 0.6293103 0.3766234 0.5161290 1.3226184
4  2010  246   89   81   90 0.4837398 0.5280899 0.4691358 0.5666667 1.0916816
5  2011  310   66   72   36 0.6838710 0.4242424 1.1527778 0.7500000 0.6203545
6  2012  387  158   88  102 0.3333333 0.3734177 0.3068182 0.4215686 1.1202532
7  2013  203  122   58   59 0.3251232 0.3360656 0.3275862 0.2372881 1.0336562
8  2014  203  101   81   67 0.4679803 0.4356436 0.6296296 0.4328358 0.9309015
9  2015   93   81   57  129 0.2580645 0.2469136 0.3157895 0.3023256 0.9567901
10 2016   87   90   47   99 0.4597701 0.7333333 0.6595745 0.4848485 1.5950000
11 2017   85   54   10   32 1.0470588 1.2222222 0.9000000 1.3125000 1.1672909
12 2018   22   31   19   52 1.2272727 1.6774194 1.3157895 2.3653846 1.3667861
      RRS_WH    RRS_WW P_value_HW P_value_WH P_value_WW
1  0.7500000 0.3461538 0.62551922 0.73850980 0.33907077
2  0.3568899 0.4276860 0.04693163 0.02406439 0.01332699
3  0.7915474 1.0847458 0.21635523 0.49075340 0.79682804
4  0.9698101 1.1714286 0.67128682 0.89265276 0.42568463
5  1.6856656 1.0966981 0.22038044 0.02207416 0.80994000
6  0.9204545 1.2647059 0.54342107 0.74794726 0.25670145
7  1.0075758 0.7298408 0.87793357 0.97877872 0.33987926
8  1.3454191 0.9249018 0.77283936 0.18164075 0.78628705
9  1.2236842 1.1715116 0.91135253 0.60263225 0.62155821
10 1.4345745 1.0545455 0.04166010 0.21566217 0.84800535
11 0.8595506 1.2535112 0.49799612 0.76700299 0.38908142
12 1.0721248 1.9273504 0.42524601 0.88886463 0.02849238
RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS_HW" | variable=="RRS_WH" | variable=="RRS_WW")


ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

Annual table for 2 generation RRS in the stream environment - hatchery fish

d = subset(pedf,Year.off <= 2015 & Year.off >2006 & Final_Status_ped.off=="Spawning Grounds" & Final_Status_ped.dam=="Broodstock" &  Final_Status_ped.sire=="Broodstock")

RStable = makeRSGEN2table(d)
RStable
  Year HH_N HW_N WH_N WW_N     HH_RS      HW_RS     WH_RS     WW_RS    RRS_HW
1 2007  151 1033  370   68 0.1192053 0.23910939 0.1108108 0.1911765 2.0058621
2 2008  420 1797  671  199 0.3285714 0.34557596 0.3383010 0.3366834 1.0517529
3 2009  994 1453  685  478 0.1488934 0.13282863 0.1751825 0.1485356 0.8921058
4 2010 1327 1520  574  565 0.1590053 0.21184211 0.1515679 0.2230088 1.3322961
5 2011  602  446  240  162 0.2192691 0.23991031 0.1916667 0.2962963 1.0941364
6 2012  988  912  421  181 0.1123482 0.08991228 0.1092637 0.1933702 0.8003003
7 2013  658  496  349  546 0.1079027 0.17540323 0.1776504 0.1666667 1.6255679
8 2014  392   26   33  169 0.3316327 0.23076923 0.2121212 0.2721893 0.6958580
9 2015  265   18   16  292 0.1433962 0.16666667 0.3125000 0.1815068 1.1622807
     RRS_WH    RRS_WW  P_value_HW P_value_WH  P_value_WW
1 0.9295796 1.6037582 0.249238667 0.94197150 0.679858482
2 1.0296119 1.0246887 0.679674673 0.83694708 0.901272580
3 1.1765634 0.9975970 0.389876103 0.24349699 0.988700169
4 0.9532259 1.4025249 0.011394671 0.78872937 0.021874765
5 0.8741162 1.3512907 0.598606962 0.56455395 0.165682544
6 0.9725450 1.7211687 0.164465013 0.88011395 0.004377568
7 1.6463941 1.5446009 0.006561979 0.01165191 0.015048181
8 0.6396270 0.8207556 0.721831941 0.63743462 0.644280037
9 2.1792763 1.2657714 0.831243692 0.14321214 0.316550356
RStableL = melt(RStable,id.vars = "Year")
RStableL = subset(RStableL,variable == "RRS_HW" | variable=="RRS_WH" | variable=="RRS_WW")


ggplot(data = RStableL,aes(x=Year,y=value,fill=variable)) + geom_col(position=position_dodge()) + geom_hline(yintercept = 1)

Try to make a figure illustrating how origin and location change over time

d = subset(pedf,Year.off <=2018)
nrow(d)
[1] 48648
d = subset(d,!is.na(sire) & !is.na(dam))
nrow(d)
[1] 33497
# convert day of year to integer, and substract 100 -- is now days after April 10
d$Dayofyear.off = as.integer(d$Dayofyear.off-100)
d$Dayofyear.dam = as.integer(d$Dayofyear.dam-100)
d$Dayofyear.sire = as.integer(d$Dayofyear.sire-100)


d$Dayofyear.off = ifelse(d$Final_Status_ped.off=="Broodstock",-1*d$Dayofyear.off,d$Dayofyear.off)
d$Dayofyear.dam = ifelse(d$Final_Status_ped.dam=="Broodstock",-1*d$Dayofyear.dam,d$Dayofyear.dam)
d$Dayofyear.sire = ifelse(d$Final_Status_ped.sire=="Broodstock",-1*d$Dayofyear.sire,d$Dayofyear.sire)

nudge = .05
d$Year.off = ifelse(d$Origin_ped.off=="Hatchery",d$Year.off + nudge,d$Year.off-nudge)
d$Year.dam = ifelse(d$Origin_ped.dam=="Hatchery",d$Year.dam + nudge,d$Year.dam-nudge)
d$Year.sire = ifelse(d$Origin_ped.sire=="Hatchery",d$Year.sire + nudge,d$Year.sire-nudge)


plot = ggplot(data=d,aes(y=Year.off,x=Dayofyear.off,color=Origin_ped.off)) + geom_segment(xend = d$Dayofyear.sire,yend = d$Year.sire,color="olivedrab",alpha=.1,linewidth=.05) + ylab("Year") + geom_segment(xend = d$Dayofyear.dam,yend = d$Year.dam,color="olivedrab",alpha=.1,linewidth=.05) +  geom_point(alpha=.2,size=1,pch=16) + ylab("Year") + xlab("Day of year") + scale_color_manual(values=c("indianred","lightsteelblue"))
plot
Warning: Removed 5769 rows containing missing values (`geom_segment()`).
Warning: Removed 5101 rows containing missing values (`geom_segment()`).

pdf("Wenatchee_pedigree_figure1.pdf",width=10,height=8)
plot
Warning: Removed 5769 rows containing missing values (`geom_segment()`).
Removed 5101 rows containing missing values (`geom_segment()`).
dev.off()
quartz_off_screen 
                2 

A version with ID numbers instead of the day of year

d = subset(pedf,Year.off <=2018)
nrow(d)
[1] 48648
#d = subset(d,!is.na(sire) & !is.na(dam))

d$num = as.integer(substr(d$id,6,9)) + 1500
d$sirenum = as.integer(substr(d$sire,6,9)) + 1500
d$damnum = as.integer(substr(d$dam,6,9)) + 1500

d$num = ifelse(d$Final_Status_ped.off=="Broodstock",-1*d$num,d$num)
d$damnum = ifelse(d$Final_Status_ped.dam=="Broodstock",-1*d$damnum,d$damnum)
d$sirenum = ifelse(d$Final_Status_ped.sire=="Broodstock",-1*d$sirenum,d$sirenum)


nudge = .05
d$Year.off = ifelse(d$Origin_ped.off=="Hatchery",d$Year.off + nudge,d$Year.off-nudge)
d$Year.dam = ifelse(d$Origin_ped.dam=="Hatchery",d$Year.dam + nudge,d$Year.dam-nudge)
d$Year.sire = ifelse(d$Origin_ped.sire=="Hatchery",d$Year.sire + nudge,d$Year.sire-nudge)

plot = ggplot(data=d,aes(y=Year.off,x=num,color=Origin_ped.off,linewidth=Origin_ped.off,alpha=Origin_ped.off))  + geom_segment(xend = d$sirenum,yend = d$Year.sire) + ylab("Year") + geom_segment(xend = d$damnum,yend = d$Year.dam) +  geom_point(alpha=.6,size=.5,pch=16) + xlab("Spawners")  + ylab("Year") + scale_color_manual(values=c("indianred","royalblue"))  + theme_cowplot() + theme(axis.text.x=element_blank()) + annotate("text",x=-6000,y=2018,label="Broodstock") + annotate("text",x=6000,y=2018,label="Spawning grounds") + theme(legend.position = "none") + scale_linewidth_manual(values=c(.02,.03)) + scale_alpha_manual(values=c(.05,.07))
plot 
Warning: Removed 18763 rows containing missing values (`geom_segment()`).
Warning: Removed 17209 rows containing missing values (`geom_segment()`).

pdf("Wenatchee_pedigree_figure_idnums.pdf",width=10,height=5)
plot
Warning: Removed 18763 rows containing missing values (`geom_segment()`).
Removed 17209 rows containing missing values (`geom_segment()`).
dev.off()
quartz_off_screen 
                2 

Effects of number of wild (or hatchery) grandparents on RRS

### note - this file was created using the field-only origin - not ped-corrected
anc = read.csv("/Users/mike.ford/Documents/MikeFord/WenSpChkProp/FRANZWensp_analysis/Ancestor_origins_1.22.25.csv",stringsAsFactors = F)
pedf = merge(pedf,anc,by="id",all.x=T)
#write.csv(pedf,"pedf_with_anc.csv",row.names = F)

Identify individuals with varying number of hatchery or wild grandparents or other ancestors, for potential WGS analysis.

In the file below, the wild_parents_and_grandparents.csv file contains fish with two wild parents and four wild grandparents. They can have hatchery ancestry for generations deeper than grandparents. The file hat_parents_and_grandparents.csv is the same, but for hatchery parents and grandparents.

The files wild_lineages.csv and hatchery_lineages.csv contain fish with only wild or only hatchery ancestry. Note that ancestry information is incomplete, however.

Make some files for Joanna for potential WGS selection:

wild4 = subset(pedf,N.wild.gp == 4 & Origin_ped.sire=="Wild"&Origin_ped.dam=="Wild")
nrow(wild4)
[1] 26
table(wild4$Year.off)

2012 2013 2014 2016 2017 2018 2019 2021 2022 2023 
   5    2    3    1    1    7    1    1    4    1 
hat4 = subset(pedf,N.hatchery.gp == 4 & Origin_ped.sire=="Hatchery" & Origin_ped.dam=="Hatchery")
nrow(hat4)
[1] 832
table(hat4$Year.off)

2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 
   4   64  299    6    4   59   88   76   20   10  171    3    2    2   20    4 
#write.csv(wild4,"wild_parents_and_grandparents.csv",row.names = F)
#write.csv(hat4,"hat_parents_and_grandparents.csv",row.names = F)

wildonly = subset(pedf,anc.ratio==1 & Origin_ped.sire=="Wild" & Origin_ped.dam == "Wild")
nrow(wildonly)
[1] 202
hatonly = subset(pedf,anc.ratio==0 & Origin_ped.sire == "Hatchery" & Origin_ped.dam=="Hatchery")
table(wildonly$N.wild.anc)

 1  2  3  4  5  6  7 
42 46 98 12  2  1  1 
table(hatonly$N.hatchery.anc)

  1   2   3   4   5   6   7   8  10 
 91 740 227 450  30  51   5   2   2 
#write.csv(wildonly,"wild_lineages.csv",row.names = F)
#write.csv(hatonly,"hat_lineages.csv",row.names = F)

Both sexes, stream environment - number of wild grandparents

# both sexes, spawning grounds
d = subset(pedf,Year.off <= 2018 & Final_Status.off=="Spawning Grounds"  & Origin_ped.sire=="Wild"&Origin_ped.dam=="Wild")
nrow(d)
[1] 4284
table(d$N.wild.gp,d$Year.off)
   
    2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
  0    3   81  368  540  639  140   50  144   84  374   86  172   56
  1    0    0    0    0   14   25   58  302   90   97   31  145   91
  2    0    0    0    0    1   21   88  111   46   29   25   49   67
  3    0    0    0    0    2   12   81   60   17    6    7    6   51
  4    0    0    0    0    0    0    5    2    3    0    0    1    4
GPglm = glm.nb(noff.off~(N.wild.gp),data=d)
summary(GPglm)

Call:
glm.nb(formula = noff.off ~ (N.wild.gp), data = d, init.theta = 0.348368255, 
    link = log)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.8224  -0.7488  -0.7247  -0.7247   6.4137  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.93681    0.04220 -22.202   <2e-16 ***
N.wild.gp    0.09424    0.03759   2.507   0.0122 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.3484) family taken to be 1)

    Null deviance: 2685.4  on 4283  degrees of freedom
Residual deviance: 2679.1  on 4282  degrees of freedom
AIC: 7126.3

Number of Fisher Scoring iterations: 1

              Theta:  0.3484 
          Std. Err.:  0.0220 

 2 x log-likelihood:  -7120.2660 
ggplot(data=d,aes(x=as.factor(N.wild.gp),y=noff.off)) + geom_violin()

# females, spawning grounds
d = subset(pedf,Year.off <= 2018 & Final_Status.off=="Spawning Grounds" & N.wild.gp>0 & Sex_Final.off=="Female")
GPlm.female = lm(noff.off~as.factor(N.wild.gp),data=d)
summary(GPlm.female)

Call:
lm(formula = noff.off ~ as.factor(N.wild.gp), data = d)

Residuals:
   Min     1Q Median     3Q    Max 
-0.865 -0.309 -0.292 -0.288 31.691 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)            0.30929    0.01168  26.469  < 2e-16 ***
as.factor(N.wild.gp)2 -0.01769    0.02018  -0.877    0.381    
as.factor(N.wild.gp)3 -0.02083    0.03617  -0.576    0.565    
as.factor(N.wild.gp)4  0.55557    0.13509   4.113 3.95e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8186 on 7989 degrees of freedom
Multiple R-squared:  0.00229,   Adjusted R-squared:  0.001916 
F-statistic: 6.113 on 3 and 7989 DF,  p-value: 0.0003781

In the broodstock…

# both, brood
d = subset(pedf,Year.off <= 2013 & Final_Status.off=="Broodstock" & N.wild.gp>0)
GPlmbrood = lm(noff.off~as.factor(N.wild.gp),data=d)
summary(GPlmbrood)

Call:
lm(formula = noff.off ~ as.factor(N.wild.gp), data = d)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.878  -9.878  -3.112   6.122  70.122 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)            16.8781     0.8144  20.724   <2e-16 ***
as.factor(N.wild.gp)2  -1.7656     1.8211  -0.970    0.333    
as.factor(N.wild.gp)3  -3.0210     3.2819  -0.921    0.358    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.57 on 418 degrees of freedom
Multiple R-squared:  0.003835,  Adjusted R-squared:  -0.0009317 
F-statistic: 0.8045 on 2 and 418 DF,  p-value: 0.448
# males, brood
d = subset(pedf,Year.off <= 2013 & Final_Status.off=="Broodstock" & N.wild.gp>0 & Sex_Final.off=="Male")
GPlmbrood.male = lm(noff.off~as.factor(N.wild.gp),data=d)
summary(GPlmbrood.male)

Call:
lm(formula = noff.off ~ as.factor(N.wild.gp), data = d)

Residuals:
    Min      1Q  Median      3Q     Max 
-18.057 -11.993  -4.262   7.943  68.943 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)             18.057      1.436  12.571   <2e-16 ***
as.factor(N.wild.gp)2   -1.085      3.185  -0.341    0.734    
as.factor(N.wild.gp)3   -4.590      4.632  -0.991    0.323    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.06 on 189 degrees of freedom
Multiple R-squared:  0.005386,  Adjusted R-squared:  -0.005139 
F-statistic: 0.5117 on 2 and 189 DF,  p-value: 0.6003
# females, brood
d = subset(pedf,Year.off <= 2013 & Final_Status.off=="Broodstock" & N.wild.gp>0 & Sex_Final.off=="Female")
GPlmbrood.female = lm(noff.off~as.factor(N.wild.gp),data=d)
summary(GPlmbrood.female)

Call:
lm(formula = noff.off ~ as.factor(N.wild.gp), data = d)

Residuals:
    Min      1Q  Median      3Q     Max 
-15.950  -8.591  -1.950   6.050  45.409 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)            15.9497     0.9053  17.618   <2e-16 ***
as.factor(N.wild.gp)2  -2.3588     2.0381  -1.157    0.248    
as.factor(N.wild.gp)3  -1.1164     5.0270  -0.222    0.824    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 12.11 on 226 degrees of freedom
Multiple R-squared:  0.005966,  Adjusted R-squared:  -0.002831 
F-statistic: 0.6782 on 2 and 226 DF,  p-value: 0.5086

Graphs of relationships between RS and traits

Male run timing, stream environment

d = subset(pedf,Year.off <= 2018 & Final_Status_ped.off=="Spawning Grounds" & Sex_Final.off=="Male")
rt.plot.males.stream = ggplot(data=d,aes(x=Dayofyear.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off) 
rt.plot.males.stream
Don't know how to automatically pick scale for object of type <difftime>.
Defaulting to continuous.
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).

Male POH, stream environment

POH.plot.males.stream = ggplot(data=d,aes(x=POH.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)
POH.plot.males.stream
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).

d = subset(pedf,Year.off <= 2013 & Final_Status_ped.off=="Broodstock" & Sex_Final.off=="Male")
rt.plot.males.brood = ggplot(data=d,aes(x=Dayofyear.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)
POH.plot.males.brood = ggplot(data=d,aes(x=POH.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)

Male run timing, in broodstock

rt.plot.males.brood
Don't know how to automatically pick scale for object of type <difftime>.
Defaulting to continuous.
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).

Male POH, in broostock

POH.plot.males.brood
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).

d = subset(pedf,Year.off <= 2018 & Final_Status_ped.off=="Spawning Grounds" & Sex_Final.off=="Female")
rt.plot.females.stream = ggplot(data=d,aes(x=Dayofyear.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off) 
POH.plot.females.stream = ggplot(data=d,aes(x=POH.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)

Female run timing, stream environment

rt.plot.females.stream
Don't know how to automatically pick scale for object of type <difftime>.
Defaulting to continuous.
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).

Female POH, stream environment

POH.plot.females.stream
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).

d = subset(pedf,Year.off <= 2013 & Final_Status_ped.off=="Broodstock" & Sex_Final.off=="Female")
rt.plot.females.brood = ggplot(data=d,aes(x=Dayofyear.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)
POH.plot.females.brood = ggplot(data=d,aes(x=POH.off,y=noff.off))  + geom_smooth(method="loess") + facet_wrap(~Year.off)

Female run timing, broodstock

rt.plot.females.brood
Don't know how to automatically pick scale for object of type <difftime>.
Defaulting to continuous.
`geom_smooth()` using formula = 'y ~ x'

Female POH, broostock

POH.plot.females.brood
`geom_smooth()` using formula = 'y ~ x'